import requests
from lxml import etree

url = 'https://www.baidu.com/'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'}
html_response = requests.get(url, headers=headers)
html_content = html_response.content.decode('utf8')
# print(html_response)
parser = etree.HTMLParser(encoding='utf8')
html = etree.HTML(html_content, parser=parser)
# 定位“新闻”的连接
a_s = html.xpath('//div[@id="s-top-left"]/a')
content_list = []
for a in a_s:
    content = {}
    href = a.xpath('./@href')[0]
    text = a.xpath('./text()')[0]
    content = {
        'href': href,
        'text': text,
    }
    content_list.append(content)
for temp_content in content_list:
    print(temp_content)
    print('*' * 50)
